suppressPackageStartupMessages({
library(data.table)
library(DESeq2)
library(gplots)
library(here)
library(hyperSpec)
library(RColorBrewer)
library(tidyverse)
library(VennDiagram)
library(emoji)
library(ggVennDiagram)
})suppressMessages({
source(here("UPSCb-common/Rtoolbox/src/plotEnrichedTreemap.R"))
source(here("UPSCb-common/src/R/featureSelection.R"))
source(here("UPSCb-common/src/R/volcanoPlot.R"))
source(here("UPSCb-common/src/R/gopher.R"))
})"line_plot" <- function(dds=dds,vst=vst,gene_id=gene_id){
message(paste("Plotting",gene_id))
sel <- grepl(gene_id,rownames(vst))
stopifnot(sum(sel)==1)
p <- ggplot(bind_cols(as.data.frame(colData(dds)),
data.frame(value=vst[sel,])),
aes(x=Genotype,y=value,fill=Genotype)) +
geom_boxplot() +
geom_jitter(color="black") +
scale_y_continuous(name="VST expression") +
ggtitle(label=paste("Expression for: ",gene_id))
suppressMessages(suppressWarnings(plot(p)))
return(NULL)
}"extract_results" <- function(dds,vst,contrast,
padj=0.01,lfc=0.5,
plot=TRUE,verbose=TRUE,
export=TRUE,default_dir=here("data/analysis/DE"),
default_prefix="DE-",
labels=colnames(dds),
sample_sel=1:ncol(dds),
expression_cutoff=0,
debug=FALSE,filter=c("median",NULL),...){
# get the filter
if(!is.null(match.arg(filter))){
filter <- rowMedians(counts(dds,normalized=TRUE))
message("Using the median normalized counts as default, set filter=NULL to revert to using the mean")
}
# validation
if(length(contrast)==1){
res <- results(dds,name=contrast,filter = filter)
} else {
res <- results(dds,contrast=contrast,filter = filter)
}
stopifnot(length(sample_sel)==ncol(vst))
if(plot){
par(mar=c(5,5,5,5))
volcanoPlot(res)
par(mar=mar)
}
# a look at independent filtering
if(plot){
plot(metadata(res)$filterNumRej,
type="b", ylab="number of rejections",
xlab="quantiles of filter")
lines(metadata(res)$lo.fit, col="red")
abline(v=metadata(res)$filterTheta)
}
if(verbose){
message(sprintf("The independent filtering cutoff is %s, removing %s of the data",
round(metadata(res)$filterThreshold,digits=5),
names(metadata(res)$filterThreshold)))
max.theta <- metadata(res)$filterNumRej[which.max(metadata(res)$filterNumRej$numRej),"theta"]
message(sprintf("The independent filtering maximises for %s %% of the data, corresponding to a base mean expression of %s (library-size normalised read)",
round(max.theta*100,digits=5),
round(quantile(counts(dds,normalized=TRUE),probs=max.theta),digits=5)))
}
if(plot){
qtl.exp=quantile(counts(dds,normalized=TRUE),probs=metadata(res)$filterNumRej$theta)
dat <- data.frame(thetas=metadata(res)$filterNumRej$theta,
qtl.exp=qtl.exp,
number.degs=sapply(lapply(qtl.exp,function(qe){
res$padj <= padj & abs(res$log2FoldChange) >= lfc &
! is.na(res$padj) & res$baseMean >= qe
}),sum))
if(debug){
plot(ggplot(dat,aes(x=thetas,y=qtl.exp)) +
geom_line() + geom_point() +
scale_x_continuous("quantiles of expression") +
scale_y_continuous("base mean expression") +
geom_hline(yintercept=expression_cutoff,
linetype="dotted",col="red"))
p <- ggplot(dat,aes(x=thetas,y=qtl.exp)) +
geom_line() + geom_point() +
scale_x_continuous("quantiles of expression") +
scale_y_log10("base mean expression") +
geom_hline(yintercept=expression_cutoff,
linetype="dotted",col="red")
suppressMessages(suppressWarnings(plot(p)))
plot(ggplot(dat,aes(x=thetas,y=number.degs)) +
geom_line() + geom_point() +
geom_hline(yintercept=dat$number.degs[1],linetype="dashed") +
scale_x_continuous("quantiles of expression") +
scale_y_continuous("Number of DE genes"))
plot(ggplot(dat,aes(x=thetas,y=number.degs[1] - number.degs),aes()) +
geom_line() + geom_point() +
scale_x_continuous("quantiles of expression") +
scale_y_continuous("Cumulative number of DE genes"))
plot(ggplot(data.frame(x=dat$thetas[-1],
y=diff(dat$number.degs[1] - dat$number.degs)),aes(x,y)) +
geom_line() + geom_point() +
scale_x_continuous("quantiles of expression") +
scale_y_continuous("Number of DE genes per interval"))
plot(ggplot(data.frame(x=dat$qtl.exp[-1],
y=diff(dat$number.degs[1] - dat$number.degs)),aes(x,y)) +
geom_line() + geom_point() +
scale_x_continuous("base mean of expression") +
scale_y_continuous("Number of DE genes per interval"))
p <- ggplot(data.frame(x=dat$qtl.exp[-1],
y=diff(dat$number.degs[1] - dat$number.degs)),aes(x,y)) +
geom_line() + geom_point() +
scale_x_log10("base mean of expression") +
scale_y_continuous("Number of DE genes per interval") +
geom_vline(xintercept=expression_cutoff,
linetype="dotted",col="red")
suppressMessages(suppressWarnings(plot(p)))
}
}
sel <- res$padj <= padj & abs(res$log2FoldChange) >= lfc & ! is.na(res$padj) &
res$baseMean >= expression_cutoff
if(verbose){
message(sprintf(paste(
ifelse(sum(sel)==1,
"There is %s gene that is DE",
"There are %s genes that are DE"),
"with the following parameters: FDR <= %s, |log2FC| >= %s, base mean expression > %s"),
sum(sel),padj,
lfc,expression_cutoff))
}
# proceed only if there are DE genes
if(sum(sel) > 0){
val <- rowSums(vst[sel,sample_sel,drop=FALSE])==0
if (sum(val) >0){
warning(sprintf(paste(
ifelse(sum(val)==1,
"There is %s DE gene that has",
"There are %s DE genes that have"),
"no vst expression in the selected samples"),sum(val)))
sel[sel][val] <- FALSE
}
if(export){
if(!dir.exists(default_dir)){
dir.create(default_dir,showWarnings=FALSE,recursive=TRUE,mode="0771")
}
write.csv(res,file=file.path(default_dir,paste0(default_prefix,"results.csv")))
write.csv(res[sel,],file.path(default_dir,paste0(default_prefix,"genes.csv")))
}
if(plot & sum(sel)>1){
heatmap.2(t(scale(t(vst[sel,sample_sel]))),
distfun = pearson.dist,
hclustfun = function(X){hclust(X,method="ward.D2")},
trace="none",col=hpal,labRow = FALSE,
labCol=labels[sample_sel],...
)
}
}
return(list(all=rownames(res[sel,]),
up=rownames(res[sel & res$log2FoldChange > 0,]),
dn=rownames(res[sel & res$log2FoldChange < 0,])))
}extractEnrichmentResults <- function(enrichment,task="go",
diff.exp=c("all","up","dn"),
go.namespace=c("BP","CC","MF"),
genes=NULL,export=TRUE,plot=TRUE,
default_dir=here("data/analysis/DE"),
default_prefix="DE",
url="athaliana"){
# process args
diff.exp <- match.arg(diff.exp)
de <- ifelse(diff.exp=="all","none",
ifelse(diff.exp=="dn","down",diff.exp))
# sanity
if( is.null(enrichment[[task]]) | length(enrichment[[task]]) == 0){
message(paste("No enrichment for",task))
} else {
# write out
if(export){
write_tsv(enrichment[[task]],
file=here(default_dir,
paste0(default_prefix,"-genes_GO-enrichment.tsv")))
if(!is.null(genes)){
write_tsv(
enrichedTermToGenes(genes=genes,terms=enrichment[[task]]$id,url=url,mc.cores=16L),
file=here(default_dir,
paste0(default_prefix,"-enriched-term-to-genes.tsv"))
)
}
}
if(plot){
sapply(go.namespace,function(ns){
titles <- c(BP="Biological Process",
CC="Cellular Component",
MF="Molecular Function")
suppressWarnings(tryCatch({plotEnrichedTreemap(enrichment,enrichment=task,
namespace=ns,
de=de,title=paste(default_prefix,titles[ns]))},
error = function(e) {
message(paste("Treemap plot failed for",ns,
"because of:",e))
return(NULL)
}))
})
}
}
}load(here("analysis/salmon/dds_mergeTechRep.rda"))
dds$Genotype <- relevel(dds$Genotype,ref = "T89")## using 'avgTxLength' from assays(dds), correcting for library size
The targeted locus for line 26 is
Potri.006G174000/Potra001877g14982 (Potra2n6c13821) And for
the line 03, it is Potri.018G096200/Potra001273g10998
(Potra2n6c13821) This is the expression level of targeted gene.
kogene <- "Potra2n6c13821"
stopifnot(kogene %in% rownames(vst))
line_plot(dds=dds,vst=vst,gene_id = kogene)## Plotting Potra2n6c13821
## NULL
| Name | V2 |
|---|---|
| SNRPG | Potra2n6c13472 |
| SNRPB/N | Potra2n11c22477 |
| SNRPD1 | Potra2n5c10994 |
| SNRPD2 | Potra2n14c27408 |
| SNRPD3 | Potra2n2c6364 |
| SNRPF | Potra2n8c17320 |
| SNRPE | Potra2n6c13821 |
| LSM1A-B | Potra2n1c1466 |
| LSM2 | Potra2n4c10338 |
| LSM3A-B | Potra2n5c11140 |
| LSM4 | Potra2n2c4537 |
| LSM6A-B | Potra2n8c17320 |
| LSM7 | Potra2n1c2324 |
| LSM8 | Potra2n1c2419 |
genelist <- goi$V2[!(goi$V2 == "Potra2n6c13821")]
stopifnot(all(genelist %in% rownames(vst)))
dev.null <- lapply(genelist,line_plot,dds=dds,vst=vst)## Plotting Potra2n6c13472
## Plotting Potra2n11c22477
## Plotting Potra2n5c10994
## Plotting Potra2n14c27408
## Plotting Potra2n2c6364
## Plotting Potra2n8c17320
## Plotting Potra2n1c1466
## Plotting Potra2n4c10338
## Plotting Potra2n5c11140
## Plotting Potra2n2c4537
## Plotting Potra2n8c17320
## Plotting Potra2n1c2324
## Plotting Potra2n1c2419
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
Check the different contrasts
## [1] "Intercept" "Genotype_line26_vs_T89" "Genotype_line3_vs_T89"
line26 <- extract_results(dds=dds,vst=vst,contrast="Genotype_line26_vs_T89",
default_dir = here("analysis/DE"),
default_prefix = "Line26_",
labels=dds$BioID,
sample_sel = dds$Genotype %in% c("line26","T89"),
cexCol=.9, plot = TRUE, verbose = TRUE)## Using the median normalized counts as default, set filter=NULL to revert to using the mean
## The independent filtering cutoff is 0.14916, removing 28.82535% of the data
## The independent filtering maximises for 28.82535 % of the data, corresponding to a base mean expression of 0 (library-size normalised read)
## There are 8765 genes that are DE with the following parameters: FDR <= 0.01, |log2FC| >= 0.5, base mean expression > 0
line03 <- extract_results(dds=dds,vst=vst,contrast="Genotype_line3_vs_T89",
default_dir = here("analysis/DE"),
default_prefix = "Line03_",
labels=dds$BioID,
sample_sel = dds$Genotype %in% c("line3","T89"),
cexCol=.9, plot = TRUE, verbose = TRUE)## Using the median normalized counts as default, set filter=NULL to revert to using the mean
## The independent filtering cutoff is 0.14916, removing 28.82535% of the data
## The independent filtering maximises for 28.82535 % of the data, corresponding to a base mean expression of 0 (library-size normalised read)
## There are 8064 genes that are DE with the following parameters: FDR <= 0.01, |log2FC| >= 0.5, base mean expression > 0
ggVennDiagram(lapply(res.list,"[[","all"), label_alpha = 0, label = "count") +
scale_fill_gradient(low="white",high = "dodgerblue") +
scale_x_continuous(expand = expansion(mult = .1))res.list <- list("Line 26"=line26)
# enr.list <- lapply(res.list,function(r){
# lapply(r,gopher,background=background,task="go",url="potra2")
# })
#
# dev.null <- lapply(names(enr.list),function(n){
# lapply(names(enr.list[[n]]),function(de){
# extractEnrichmentResults(enr.list[[n]][[de]],
# diff.exp=de,
# genes=res.list[[n]][[de]],
# default_prefix=paste(n,de,sep="-"),
# url="potra2")
# })
# })res.list <- list("Line 03"=line03)
# enr.list <- lapply(res.list,function(r){
# lapply(r,gopher,background=background,task="go",url="potra2")
# })
#
# dev.null <- lapply(names(enr.list),function(n){
# lapply(names(enr.list[[n]]),function(de){
# extractEnrichmentResults(enr.list[[n]][[de]],
# diff.exp=de,
# genes=res.list[[n]][[de]],
# default_prefix=paste(n,de,sep="-"),
# url="potra2")
# })
# })res.list <- list("CommonLine26andLine03"=list("all"=intersect(line26$all,line03$all),
"up"=intersect(line26$up,line03$up),
"dn"=intersect(line26$dn,line03$dn)))
# enr.list <- lapply(res.list,function(r){
# lapply(r,gopher,background=background,task="go",url="potra2")
# })
#
# dev.null <- lapply(names(enr.list),function(n){
# lapply(names(enr.list[[n]]),function(de){
# extractEnrichmentResults(enr.list[[n]][[de]],
# diff.exp=de,
# genes=res.list[[n]][[de]],
# default_prefix=paste(n,de,sep="-"),
# url="potra2")
# })
# })## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel grid stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] jsonlite_1.8.7 ggVennDiagram_1.2.3 LSD_4.1-0 vsn_3.70.0
## [5] tximport_1.30.0 pvclust_2.2-0 plotly_4.10.3 pheatmap_1.0.12
## [9] PCAtools_2.14.0 ggrepel_0.9.4 patchwork_1.1.3 magrittr_2.0.3
## [13] gtools_3.9.4 emoji_15.0 limma_3.58.1 VennDiagram_1.7.3
## [17] futile.logger_1.4.3 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.0
## [21] dplyr_1.1.3 purrr_1.0.2 readr_2.1.4 tidyr_1.3.0
## [25] tibble_3.2.1 tidyverse_2.0.0 RColorBrewer_1.1-3 hyperSpec_0.100.0
## [29] xml2_1.3.5 ggplot2_3.4.4 lattice_0.21-8 here_1.0.1
## [33] gplots_3.1.3 DESeq2_1.42.0 SummarizedExperiment_1.32.0 Biobase_2.62.0
## [37] MatrixGenerics_1.14.0 matrixStats_1.1.0 GenomicRanges_1.54.1 GenomeInfoDb_1.38.0
## [41] IRanges_2.36.0 S4Vectors_0.40.1 BiocGenerics_0.48.1 data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.15.0 rmarkdown_2.25 farver_2.1.1 zlibbioc_1.48.0
## [5] vctrs_0.6.4 DelayedMatrixStats_1.24.0 RCurl_1.98-1.13 htmltools_0.5.7
## [9] S4Arrays_1.2.0 curl_5.1.0 lambda.r_1.2.4 SparseArray_1.2.1
## [13] sass_0.4.7 bslib_0.5.1 KernSmooth_2.23-22 htmlwidgets_1.6.2
## [17] plyr_1.8.9 testthat_3.2.0 cachem_1.0.8 futile.options_1.0.1
## [21] lifecycle_1.0.4 pkgconfig_2.0.3 rsvd_1.0.5 Matrix_1.6-0
## [25] R6_2.5.1 fastmap_1.1.1 GenomeInfoDbData_1.2.11 digest_0.6.33
## [29] colorspace_2.1-0 rprojroot_2.0.4 dqrng_0.3.1 irlba_2.3.5.1
## [33] beachmat_2.18.0 labeling_0.4.3 fansi_1.0.5 timechange_0.2.0
## [37] httr_1.4.7 abind_1.4-5 compiler_4.3.1 proxy_0.4-27
## [41] bit64_4.0.5 withr_2.5.2 BiocParallel_1.36.0 DBI_1.1.3
## [45] highr_0.10 DelayedArray_0.28.0 classInt_0.4-10 caTools_1.18.2
## [49] units_0.8-4 tools_4.3.1 glue_1.6.2 sf_1.0-14
## [53] reshape2_1.4.4 generics_0.1.3 gtable_0.3.4 tzdb_0.4.0
## [57] class_7.3-22 preprocessCore_1.64.0 hms_1.1.3 BiocSingular_1.18.0
## [61] ScaledMatrix_1.10.0 utf8_1.2.4 XVector_0.42.0 pillar_1.9.0
## [65] vroom_1.6.4 bit_4.0.5 deldir_1.0-9 tidyselect_1.2.0
## [69] locfit_1.5-9.8 knitr_1.45 xfun_0.41 statmod_1.5.0
## [73] brio_1.1.3 stringi_1.7.12 yaml_2.3.7 lazyeval_0.2.2
## [77] evaluate_0.23 codetools_0.2-19 interp_1.1-4 BiocManager_1.30.22
## [81] RVenn_1.1.0 cli_3.6.1 affyio_1.72.0 jquerylib_0.1.4
## [85] munsell_0.5.0 Rcpp_1.0.11 png_0.1-8 latticeExtra_0.6-30
## [89] jpeg_0.1-10 sparseMatrixStats_1.14.0 bitops_1.0-7 viridisLite_0.4.2
## [93] e1071_1.7-13 scales_1.2.1 affy_1.80.0 crayon_1.5.2
## [97] rlang_1.1.2 cowplot_1.1.1 formatR_1.14
Created by Fai Theerarat Kochakarn